pacman::p_load(tidyverse,data.table,broom,readxl,latex2exp,ggpubr)
rm(list=ls())
`%!in%` = Negate(`%in%`)
################################################################################

#Spatial units
spatial_id_estimation_a = 'county_id'
spatial_id_estimation_b = 'amc_id'
spatial_id_agg = 'amc_id'

df = fread(paste0('../../data/landuse/clean/geographicunits_argentina.csv.gz'))[, list(country, region, state, state_id, county_id)]
setnames(df, old=c(spatial_id_estimation_a), new=c('spatial_id'))
df$spatial_id_agg=df$spatial_id
df$microregion_id = df$spatial_id_agg
df$mesoregion_id = df$spatial_id_agg
df = df[, list(country, spatial_id, spatial_id_agg, microregion_id, mesoregion_id, state_id, region)]
df_a_u = unique(df, by = "spatial_id")

df = fread(paste0('../../data/landuse/clean/geographicunits_brazil.csv.gz'))[, list(country, region, state, state_id, mesoregion_id, microregion_id, amc_id, county_id)]
setnames(df, old=c(spatial_id_estimation_b), new=c('spatial_id'))
df$spatial_id_agg=df$spatial_id
df = df[, list(country, spatial_id, spatial_id_agg, microregion_id, mesoregion_id, state_id, region)]
df_b_u = unique(df, by = "spatial_id")

df = rbind(df_a_u,df_b_u)
df$frontier=0
frontier_list = c('NORTE','NORDESTE','NOA','NEA','PATAGONIA','CUYO')
df[region %in% frontier_list,]$frontier=1
df_cw = df

df = df_cw[, list(country, spatial_id_agg, microregion_id, mesoregion_id, state_id, region, frontier)]
df_cw_agg = unique(df, by = "spatial_id_agg")


###################### Zetat terms

df = fread(paste0("inputs/parameters_supply_across_zetat_level.csv"))
df_id = df[year %in% c(2017,2018),][,list(spatial_id, commodity, zetat, zetat_rescaled)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id, commodity)]

scale_agg_f = 0.1
scale_agg_c = 0.2
scale_agg_r = 0.3
scale_agg_st = 0.4
scale_agg_meso = 0.5 
scale_agg_micro = 0.6

for(n_commodity in c("soybean","maize","pasture","sunflower","wheat","rice","sugarcane") ){
  
  dfx = df_id[commodity==n_commodity,]
  dfy = merge(dfx,df_cw,by='spatial_id', all.y=T)
  dfy$commodity = n_commodity
  
  df_f = dfy[,c('frontier','zetat','zetat_rescaled')][, lapply(.SD, median, na.rm=TRUE), by=list(frontier)][,c('zetat_f','zetat_f_rescaled'):=list(scale_agg_f*zetat,scale_agg_f*zetat_rescaled)]
  df_c = dfy[,c('country','frontier','zetat','zetat_rescaled')][, lapply(.SD, median, na.rm=TRUE), by=list(country,frontier)][,c('zetat_c','zetat_c_rescaled'):=list(scale_agg_c*zetat,scale_agg_c*zetat_rescaled)]
  df_r = dfy[,c('region','zetat','zetat_rescaled')][, lapply(.SD, median, na.rm=TRUE), by=list(region)][,c('zetat_r','zetat_r_rescaled'):=list(scale_agg_r*zetat,scale_agg_r*zetat_rescaled)]
  df_st = dfy[,c('state_id','zetat','zetat_rescaled')][, lapply(.SD, median, na.rm=TRUE), by=list(state_id)][,c('zetat_st','zetat_st_rescaled'):=list(scale_agg_st*zetat,scale_agg_st*zetat_rescaled)]
  df_meso = dfy[,c('mesoregion_id','zetat','zetat_rescaled')][, lapply(.SD, median, na.rm=TRUE), by=list(mesoregion_id)][,c('zetat_meso','zetat_meso_rescaled'):=list(scale_agg_meso*zetat,scale_agg_meso*zetat_rescaled)]
  df_micro = dfy[,c('microregion_id','zetat','zetat_rescaled')][, lapply(.SD, median, na.rm=TRUE), by=list(microregion_id)][,c('zetat_micro','zetat_micro_rescaled'):=list(scale_agg_micro*zetat,scale_agg_micro*zetat_rescaled)]
  
  df = merge(dfy, df_micro[,c('microregion_id','zetat_micro','zetat_micro_rescaled')], by='microregion_id', all.x=T)
  df = merge(df, df_meso[,c('mesoregion_id','zetat_meso','zetat_meso_rescaled')], by='mesoregion_id',  all.x=T)
  df = merge(df, df_st[,c('state_id','zetat_st','zetat_st_rescaled')], by='state_id',  all.x=T)
  df = merge(df, df_r[,c('region','zetat_r','zetat_r_rescaled')], by='region',  all.x=T)
  df = merge(df, df_c[,c('country','frontier','zetat_c','zetat_c_rescaled')], by=c('country','frontier'),  all.x=T)
  df = merge(df, df_f[,c('frontier','zetat_f','zetat_f_rescaled')], by=c('frontier'),  all.x=T)
  
  df[is.na(zetat)]$zetat  = df[is.na(zetat)]$zetat_micro
  df[is.na(zetat)]$zetat  = df[is.na(zetat)]$zetat_meso
  df[is.na(zetat)]$zetat  = df[is.na(zetat)]$zetat_st 
  df[is.na(zetat)]$zetat  = df[is.na(zetat)]$zetat_r 
  df[is.na(zetat)]$zetat  = df[is.na(zetat)]$zetat_c 
  df[is.na(zetat)]$zetat  = df[is.na(zetat)]$zetat_f 
  
  df[is.na(zetat_rescaled)]$zetat_rescaled  = df[is.na(zetat_rescaled)]$zetat_micro_rescaled
  df[is.na(zetat_rescaled)]$zetat_rescaled  = df[is.na(zetat_rescaled)]$zetat_meso_rescaled
  df[is.na(zetat_rescaled)]$zetat_rescaled  = df[is.na(zetat_rescaled)]$zetat_st_rescaled
  df[is.na(zetat_rescaled)]$zetat_rescaled  = df[is.na(zetat_rescaled)]$zetat_r_rescaled
  df[is.na(zetat_rescaled)]$zetat_rescaled  = df[is.na(zetat_rescaled)]$zetat_c_rescaled 
  df[is.na(zetat_rescaled)]$zetat_rescaled  = df[is.na(zetat_rescaled)]$zetat_f_rescaled 
  
  df[,c("zetat_micro","zetat_meso","zetat_st","zetat_r","zetat_c","zetat_f","zetat_micro_rescaled","zetat_meso_rescaled","zetat_st_rescaled","zetat_r_rescaled","zetat_c_rescaled","zetat_f_rescaled"):=c(NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)]

  if(n_commodity=='soybean'){df_zetat = df}else{df_zetat=rbind(df_zetat,df)}

}

df_zetat = df_zetat[,c('commodity','spatial_id_agg','zetat', 'zetat_rescaled')][, lapply(.SD, mean, na.rm=TRUE), by=list(commodity, spatial_id_agg)]
setnames(df_zetat, old='spatial_id_agg', new='spatial_id')
df_zetat = df_zetat[!is.na(zetat),]
write.csv(df_zetat, gzfile(paste0("inputs/parameters_supply_across_zetat_level_curated.csv.gz")), row.names = FALSE) 


###################### AN terms

df = fread(paste0("inputs/parameters_supply_AN.csv"))
df_id = df[year %in% c(2017,2018)][,list(spatial_id, AN, log_AN)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id)]

scale_agg_AN_f = 1.6
scale_agg_AN_c = 1.5
scale_agg_AN_r = 1.4
scale_agg_AN_st = 1.3
scale_agg_AN_meso = 1.2
scale_agg_AN_micro = 1.1

dfy = merge(df_id,df_cw,by='spatial_id', all.y=T)
df_f = dfy[,c('frontier','AN','log_AN')][, lapply(.SD, median, na.rm=TRUE), by=list(frontier)][,c('AN_f','log_AN_f'):=list(scale_agg_AN_f*AN,scale_agg_AN_f*log_AN)]
df_c = dfy[,c('country','frontier','AN','log_AN')][, lapply(.SD, median, na.rm=TRUE), by=list(country,frontier)][,c('AN_c','log_AN_c'):=list(scale_agg_AN_c*AN,scale_agg_AN_c*log_AN)]
df_r = dfy[,c('region','AN','log_AN')][, lapply(.SD, median, na.rm=TRUE), by=list(region)][,c('AN_r','log_AN_r'):=list(scale_agg_AN_r*AN,scale_agg_AN_r*log_AN)]
df_st = dfy[,c('state_id','AN','log_AN')][, lapply(.SD, median, na.rm=TRUE), by=list(state_id)][,c('AN_st','log_AN_st'):=list(scale_agg_AN_st*AN,scale_agg_AN_st*log_AN)]
df_meso = dfy[,c('mesoregion_id','AN','log_AN')][, lapply(.SD, median, na.rm=TRUE), by=list(mesoregion_id)][,c('AN_meso','log_AN_meso'):=list(scale_agg_AN_meso*AN,scale_agg_AN_meso*log_AN)]
df_micro = dfy[,c('microregion_id','AN','log_AN')][, lapply(.SD, median, na.rm=TRUE), by=list(microregion_id)][,c('AN_micro','log_AN_micro'):=list(scale_agg_AN_micro*AN,scale_agg_AN_micro*log_AN)]

df = merge(dfy, df_micro[,c('microregion_id','AN_micro','log_AN_micro')], by='microregion_id', all.x=T)
df = merge(df, df_meso[,c('mesoregion_id','AN_meso','log_AN_meso')], by='mesoregion_id',  all.x=T)
df = merge(df, df_st[,c('state_id','AN_st','log_AN_st')], by='state_id',  all.x=T)
df = merge(df, df_r[,c('region','AN_r','log_AN_r')], by='region',  all.x=T)
df = merge(df, df_c[,c('country','frontier','AN_c','log_AN_c')], by=c('country','frontier'),  all.x=T)
df = merge(df, df_f[,c('frontier','AN_f','log_AN_f')], by=c('frontier'),  all.x=T)

df[is.na(AN)]$AN  = df[is.na(AN)]$AN_micro
df[is.na(AN)]$AN  = df[is.na(AN)]$AN_meso
df[is.na(AN)]$AN  = df[is.na(AN)]$AN_st 
df[is.na(AN)]$AN  = df[is.na(AN)]$AN_r 
df[is.na(AN)]$AN  = df[is.na(AN)]$AN_c 
df[is.na(AN)]$AN  = df[is.na(AN)]$AN_f 

df[is.na(log_AN)]$log_AN  = df[is.na(log_AN)]$log_AN_micro
df[is.na(log_AN)]$log_AN  = df[is.na(log_AN)]$log_AN_meso
df[is.na(log_AN)]$log_AN  = df[is.na(log_AN)]$log_AN_st
df[is.na(log_AN)]$log_AN  = df[is.na(log_AN)]$log_AN_r
df[is.na(log_AN)]$log_AN  = df[is.na(log_AN)]$log_AN_c 
df[is.na(log_AN)]$log_AN  = df[is.na(log_AN)]$log_AN_f 

df[,c("AN_micro","AN_meso","AN_st","AN_r","AN_c","AN_f","log_AN_micro","log_AN_meso","log_AN_st","log_AN_r","log_AN_c","log_AN_f"):=c(NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)]
df = df[,c('spatial_id_agg','AN','log_AN')][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id_agg)]
setnames(df, old='spatial_id_agg', new='spatial_id')

max_var = quantile(df$AN,0.975)
min_var = quantile(df$AN,0.025)
df[df$AN>max_var,]$AN = max_var
df[df$AN<min_var,]$AN = min_var

max_var = quantile(df$log_AN,0.975)
min_var = quantile(df$log_AN,0.025)
df[df$log_AN>max_var,]$log_AN = max_var
df[df$log_AN<min_var,]$log_AN = min_var

df_AN=df
write.csv(df_AN, gzfile(paste0("inputs/parameters_supply_AN_curated.csv.gz")), row.names = FALSE)
